{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import california_housing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "house_data = california_housing.fetch_california_housing()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house_data.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([   8.3252    ,   41.        ,    6.98412698,    1.02380952,\n",
       "        322.        ,    2.55555556,   37.88      , -122.23      ])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house_data.data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.526"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "house_data.target[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainX,testX,trainY,testY = train_test_split(house_data.data, house_data.target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression, Ridge, ElasticNet, Lasso, RANSACRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "models = [ LinearRegression(), Ridge(alpha=1.0), ElasticNet(), Lasso(alpha=1.0), Lasso(alpha=0.0001)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "models = [ Lasso(alpha=100.0), Lasso(alpha=1.0), Lasso(alpha=0.0001), LinearRegression(), RANSACRegressor()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mse 0.9148004363049096\n",
      "r2 -0.00014880550583162844\n",
      "mse 0.7711265153689043\n",
      "r2 0.2875778542446845\n",
      "mse 0.5269605915744405\n",
      "r2 0.6224508408292027\n",
      "mse 0.5269419567130581\n",
      "r2 0.6224503934029577\n",
      "mse 0.5559318525568541\n",
      "r2 -2.873812107684575\n"
     ]
    }
   ],
   "source": [
    "for model in models:\n",
    "    model.fit(trainX,trainY)\n",
    "    pred = model.predict(testX)\n",
    "    print ('mse',mean_absolute_error(y_pred=pred, y_true=testY))\n",
    "    print ('r2',r2_score(y_pred=pred, y_true=testY))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import PolynomialFeatures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "pol = PolynomialFeatures(degree=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 1., 2., 1., 2., 4.],\n",
       "       [1., 2., 3., 4., 6., 9.]])"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pol.fit_transform([[1,2],[2,3]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train-score 0.6003913408247474\n",
      "test-score 0.6224503934029568\n",
      "train-score 0.6800571215627242\n",
      "test-score 0.23924691809870324\n",
      "train-score 0.7285352694571225\n",
      "test-score -118861.3058564364\n",
      "train-score 0.7493199505887782\n",
      "test-score -13830759966.680698\n",
      "train-score 0.7395664267943461\n",
      "test-score -4054318817.485643\n"
     ]
    }
   ],
   "source": [
    "for degree in range(1,6):\n",
    "    pol = PolynomialFeatures(degree=degree)\n",
    "    trx = pol.fit_transform(trainX)\n",
    "    tsx = pol.transform(testX)\n",
    "    lr = LinearRegression()\n",
    "    lr.fit(trx,trainY)\n",
    "    print ('train-score',lr.score(trx,trainY))\n",
    "    print ('test-score',lr.score(tsx,testY))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
